Compute recording indicies in database

This notebook computes indicies for sounds (or soundscape recordings) in a pumilio database.

Required packages

Variable declarations

working_directory – temporary directory for processing recordings


In [7]:
working_directory = os.path.join(os.environ['HOME'], "pymilio_temp/")

Import statements


In [1]:
%load_ext rpy2.ipython

In [2]:
import rpy2.robjects as ro

In [3]:
%%R

library('tuneR')
library('seewave')
library('soundecology')


/Users/Jake/code_sandbox/venv/lib/python3.5/site-packages/rpy2/robjects/functions.py:106: UserWarning: tuneR >= 1.0 has changed its Wave class definition.
Use updateWave(object) to convert Wave objects saved with previous versions of tuneR.

  res = super(Function, self).__call__(*new_args, **new_kwargs)

In [4]:
import pandas
import pandas.rpy.common as com

# new version not working...
#from rpy2.robjects import pandas2ri
#pandas2ri.activate()
#from rpy2.robjects import r


/Users/Jake/code_sandbox/venv/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: The pandas.rpy module is deprecated and will be removed in a future version. We refer to external packages like rpy2. 
See here for a guide on how to port your code to rpy2: http://pandas.pydata.org/pandas-docs/stable/r_interface.html
  from ipykernel import kernelapp as app

In [5]:
import os.path
from shutil import rmtree
import subprocess

In [6]:
import pyprind

In [9]:
from Pymilio import database

Connect to database


In [10]:
pumilio_db = database.Pymilio_db_connection(user='pumilio',
                                            database='pumilio',
                                            read_default_file='~/.my.cnf.pumilio')

get all sounds


In [11]:
sounds = pumilio_db.get_sound_paths(prepath=source_directory)

Function definitions

database functions


In [12]:
def insert_row(table, columns, values):
    pumilio_db.insert(table, values=(columns, values))

In [13]:
def update_row(table, values, where):
    pumilio_db.update(table=table, values=values, where=where)

index calculation functions


In [14]:
# acoustic complexity index
def calculateACI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_complexity(sound, min_freq = NA, max_freq = NA, j = 5, fft_w = 512)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(ACI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    ACI = com.load_data('ACI')
    
    results = {
        'AciTotAll_left':ACI['AciTotAll_left'][0],
        'AciTotAll_right':ACI['AciTotAll_right'][0],
        'AciTotAll_left_bymin':ACI['AciTotAll_left_bymin'][0],
        'AciTotAll_right_bymin':ACI['AciTotAll_right_bymin'][0]
    }
    return results, language, command

In [15]:
# acoustic diversity index
def calculateADI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_diversity(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000, shannon = TRUE)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(ADI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    ADI = com.load_data('ADI')
    #com.convert_robj(ADI)
    # new version not working ...
    #r.data('ADI')
    #ADI = pandas2ri.ri2py(r['ADI'])
    
    results = {
        'adi_left':ADI['adi_left'][0],
        'adi_right':ADI['adi_right'][0],
        'left_band_values':str(ADI['left_band_values']),
        'left_bandrange_values':"['" + "', '".join(ADI['left_bandrange_values']) + "']",
        'right_band_values':str(ADI['right_band_values']),
        'right_bandrange_values':"['" + "', '".join(ADI['right_bandrange_values']) + "']"
    }
    return results, language, command

In [16]:
# acoustic evenness index
def calculateAEI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "acoustic_evenness(sound, max_freq = 12000, db_threshold = -50, freq_step = 1000"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(AEI <- {0}), file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    AEI = com.load_data('AEI')    
    
    results = {
        'aei_left':AEI['aei_left'][0],
        'aei_right':AEI['aei_right'][0]
    }
    return results, language, command

In [17]:
# bioacoustic index
def calculateBAI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "bioacoustic_index(sound, min_freq = 2000, max_freq = 8000, fft_w = 512)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(BAI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    BAI = com.load_data('BAI')
    
    results = {
        'left_area':BAI['left_area'][0],
        'right_area':BAI['right_area'][0]
    }
    return results, language, command

In [18]:
# nsdi index (normalized difference soundscape index)
def calculateNDSI(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = "ndsi(sound, fft_w = 1024, anthro_min = 1000, anthro_max = 2000, bio_min = 2000, bio_max = 12000)"
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(NDSI <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    NDSI = com.load_data('NDSI')
    
    results = {    
        'ndsi_left':NDSI['ndsi_left'][0],
        'ndsi_right':NDSI['ndsi_right'][0],
        'biophony_left':NDSI['biophony_left'][0],
        'anthrophony_left':NDSI['anthrophony_left'][0],
        'biophony_right':NDSI['biophony_right'][0],
        'anthrophony_right':NDSI['anthrophony_right'][0]
    }
    return results, language, command

In [19]:
# soundscapespec
def calculateSSS(sound):
    # specify command with all arguments and language used for computation
    language = 'R'
    command = """soundscapespec(sound, wl = 1024, wn = "hamming", ovlp = 50, plot = FALSE)"""
    # load wav file in R environment
    r_command = """capture.output(if (!(exists("sound"))) {{ sound <- readWave("{0}") }}, file=NULL)""".format(sound)
    ro.r(r_command);
    # run calculation in R environment
    r_command = """capture.output(SSS <- {0}, file=NULL)""".format(command)
    ro.r(r_command);
    # load results into python environment
    SSS = com.load_data('SSS')
    
    results = {
        'frequency_power':str([ '{0:.4f}'.format(n) for n in SSS['amplitude'].as_matrix() ]),
    }
    return results, language, command

testing...


In [20]:
def log_process(string):
    log_directory = os.path.join(os.environ['HOME'], "Desktop/process_log/")
    if not os.path.exists(log_directory):
        os.mkdir(log_directory)
    log_name = "process_log.txt"
    if os.path.exists(log_directory + log_name):
        log_file = open(log_directory + log_name, 'a')
    else:
        log_file = open(log_directory + log_name, 'w')
    log_file.write(string+'\n')
    log_file.close()

In [21]:
#results = calculateBAI("/Users/Jake/Desktop/test/160224-160000.wav")

In [22]:
#sounds = {'129': '/Users/Jake/Desktop/test/160224-160000.wav'}

conversion functions


In [23]:
def convertFLACtoWAV(flacfile, wavfile):
    if os.path.exists(working_directory):
        rmtree(working_directory)
    os.mkdir(working_directory)
    
    subprocess.check_output(["flac", "-d", flacfile, "-o", wavfile])

Process all sounds

check and calculate missing indicies for all sounds in the database


In [24]:
# list of supported indicies
#indices = ['ACI', 'ADI', 'AEI', 'BAI', 'NDSI', 'SSS']
indices = ['SSS']

progress_bar = pyprind.ProgBar(len(sounds), bar_char='█', title='Process progress', monitor=True, stream=1, width=50)

for sound in sounds:
    
    # compute all indices for the current sound
    for index in indices:
        # update progress bar
        progress_bar.update(item_id = sound+" -> "+index)
        
        tablename = 'Index' + index
        row = pumilio_db.fetch_as_pandas_df(table=tablename, where="Sound = '{0}'".format(sound))
        
        # check if a calculation for the current sound and index already exists
        if len(row) == 0:
            insert_row(table=tablename, columns='Sound', values=sound)
            # create a wav file for analysis if one does not already exist in the working directory
            filename = os.path.basename(sounds[sound]).strip('.flac')
            wavfile = os.path.join(working_directory, filename) + '.wav'
            if not os.path.exists(wavfile):
                convertFLACtoWAV(sounds[sound], wavfile)
            # compute index
            results, language, command = eval("calculate{0}(wavfile)".format(index))
            # update each column in database for each part of the index returned
            for item in results:
                values = (item, results[item])
                update_row(table=tablename, values=values, where="Sound = '{0}'".format(sound))
            # check if command already exists in the 'Analyses' table
            command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
            if len(command_row) == 0:
                # add the command to the 'Analyses table if it does not exist
                insert_row(table='Analyses', columns='command', values=command)
                # get id of new row
                command_row = pumilio_db.fetch_as_pandas_df(table='Analyses', where="command = '{0}'".format(command))
                command_ID = command_row['ID'][0]
                # update all other columns
                update_row(table='Analyses', values=('name', index), where="ID = '{0}'".format(command_ID))
                update_row(table='Analyses', values=('language', language), where="ID = '{0}'".format(command_ID))
            elif len(command_row) == 1:
                # if the command exists, get the command_id
                command_ID = command_row['ID'][0]
            else:
                # error!
                pass
            # update the command_id for the index calculation
            update_row(table=tablename, values=('command_ID', command_ID), where="Sound = '{0}'".format(sound))
        elif len(row) == 1:
            pass
            #print("A record in the {0} table already exists for Sound {1}".format(tablename, sound))
        else:
            pass
            #print("WARNING: There were {0} analysis rows returned for SoundID = {0}".format(str(len(analyses)), sound))
    
    # cleanup R environment
    ro.r("remove('sound')")
    
    # cleanup working directory
    if os.path.exists(working_directory):
        rmtree(working_directory)

print('\n')
progress_bar.update()
print(progress_bar)


Process progress
0%                                              100%
[██████████████████████████████████████████████████] | ETA: 00:00:00 | Item ID: 153 -> SSS
Total time elapsed: 00:16:53



Total time elapsed: 00:16:59
Title: Process progress
  Started: 06/03/2016 11:40:52
  Finished: 06/03/2016 11:57:51
  Total time elapsed: 00:16:59
  CPU %: 89.60
  Memory %: 15.09